pacman::p_load(rio, rafalib, pheatmap, dplyr, kableExtra, knitr, RColorBrewer,
               ETLUtils, pheatmap, NormalyzerDE, MSnbase, data.table, sva,
               ReactomeGSA, EnhancedVolcano, plotly, ggplot2, tidyr,
               ggpubr, purrr, gplots, mice, VIM, UpSetR)

1 Brief Information About Data

The data used in this analysis was part of my PhD project. The data is an output of MaxQuant, a tools use for quantitative analysis of large mass-spectrometric data. B cells that was transfected with Raft-Apex2 were activated with HEL, Fab2 at different time point. Followed by mass-spectrometry analysis.

Conditions compare in this analysis include:

  • Fab2 5min activated Vs non-activated
  • Fab2 10min activated Vs non-activated
  • Fab2 15min activated Vs non-activated
  • HEL 5min activated Vs non-activated
  • HEL 10min activated Vs non-activated
  • HEL 15min activated Vs non-activated
  • Fab2 5min activated Vs HEL 5min activated
  • Fab2 10min activated Vs HEL 10min activated
  • Fab2 15min activated Vs HEL 15min activated

2 Exploratory Data Analysis

2.1 Data quality check

# Read raw file
raw = read.delim("data/proteinGroups.txt", stringsAsFactors = FALSE,
                 colClasses = "character")
head(raw) %>% 
  kbl(caption = "Table") %>% 
  kable_paper(bootstrap_options = "striped", full_width = F) %>% 
  kable_styling(fixed_thead = T, full_width = FALSE) %>% 
  scroll_box()
Table
Protein.IDs Majority.protein.IDs Peptide.counts..all. Peptide.counts..razor.unique. Peptide.counts..unique. Protein.names Gene.names Fasta.headers Number.of.proteins Peptides Razor…unique.peptides Unique.peptides Peptides.10Fab2_1 Peptides.10Fab2_2 Peptides.10Fab2_3 Peptides.10HEL_1 Peptides.10HEL_2 Peptides.10HEL_3 Peptides.10NA_1 Peptides.10NA_2 Peptides.10NA_3 Peptides.15Fab2_1 Peptides.15Fab2_2 Peptides.15Fab2_3 Peptides.15HEL_1 Peptides.15HEL_2 Peptides.15HEL_3 Peptides.15NA_1 Peptides.15NA_2 Peptides.15NA_3 Peptides.5Fab2_1 Peptides.5Fab2_2 Peptides.5Fab2_3 Peptides.5HEL_1 Peptides.5HEL_2 Peptides.5HEL_3 Peptides.5NA_1 Peptides.5NA_2 Peptides.5NA_3 Peptides.NB_1 Peptides.NB_2 Peptides.NB_3 Razor…unique.peptides.10Fab2_1 Razor…unique.peptides.10Fab2_2 Razor…unique.peptides.10Fab2_3 Razor…unique.peptides.10HEL_1 Razor…unique.peptides.10HEL_2 Razor…unique.peptides.10HEL_3 Razor…unique.peptides.10NA_1 Razor…unique.peptides.10NA_2 Razor…unique.peptides.10NA_3 Razor…unique.peptides.15Fab2_1 Razor…unique.peptides.15Fab2_2 Razor…unique.peptides.15Fab2_3 Razor…unique.peptides.15HEL_1 Razor…unique.peptides.15HEL_2 Razor…unique.peptides.15HEL_3 Razor…unique.peptides.15NA_1 Razor…unique.peptides.15NA_2 Razor…unique.peptides.15NA_3 Razor…unique.peptides.5Fab2_1 Razor…unique.peptides.5Fab2_2 Razor…unique.peptides.5Fab2_3 Razor…unique.peptides.5HEL_1 Razor…unique.peptides.5HEL_2 Razor…unique.peptides.5HEL_3 Razor…unique.peptides.5NA_1 Razor…unique.peptides.5NA_2 Razor…unique.peptides.5NA_3 Razor…unique.peptides.NB_1 Razor…unique.peptides.NB_2 Razor…unique.peptides.NB_3 Unique.peptides.10Fab2_1 Unique.peptides.10Fab2_2 Unique.peptides.10Fab2_3 Unique.peptides.10HEL_1 Unique.peptides.10HEL_2 Unique.peptides.10HEL_3 Unique.peptides.10NA_1 Unique.peptides.10NA_2 Unique.peptides.10NA_3 Unique.peptides.15Fab2_1 Unique.peptides.15Fab2_2 Unique.peptides.15Fab2_3 Unique.peptides.15HEL_1 Unique.peptides.15HEL_2 Unique.peptides.15HEL_3 Unique.peptides.15NA_1 Unique.peptides.15NA_2 Unique.peptides.15NA_3 Unique.peptides.5Fab2_1 Unique.peptides.5Fab2_2 Unique.peptides.5Fab2_3 Unique.peptides.5HEL_1 Unique.peptides.5HEL_2 Unique.peptides.5HEL_3 Unique.peptides.5NA_1 Unique.peptides.5NA_2 Unique.peptides.5NA_3 Unique.peptides.NB_1 Unique.peptides.NB_2 Unique.peptides.NB_3 Sequence.coverage…. Unique…razor.sequence.coverage…. Unique.sequence.coverage…. Mol..weight..kDa. Sequence.length Sequence.lengths Fraction.average Fraction.1 Fraction.2 Fraction.3 Fraction.4 Fraction.11 Fraction.12 Fraction.13 Fraction.14 Fraction.21 Fraction.22 Fraction.23 Fraction.24 Fraction.31 Fraction.32 Fraction.33 Fraction.34 Fraction.41 Fraction.42 Fraction.43 Fraction.44 Fraction.51 Fraction.52 Fraction.53 Fraction.54 Fraction.61 Fraction.62 Fraction.63 Fraction.64 Fraction.71 Fraction.72 Fraction.73 Fraction.74 Fraction.81 Fraction.82 Fraction.83 Fraction.84 Fraction.91 Fraction.92 Fraction.93 Fraction.94 Q.value Score Identification.type.10Fab2_1 Identification.type.10Fab2_2 Identification.type.10Fab2_3 Identification.type.10HEL_1 Identification.type.10HEL_2 Identification.type.10HEL_3 Identification.type.10NA_1 Identification.type.10NA_2 Identification.type.10NA_3 Identification.type.15Fab2_1 Identification.type.15Fab2_2 Identification.type.15Fab2_3 Identification.type.15HEL_1 Identification.type.15HEL_2 Identification.type.15HEL_3 Identification.type.15NA_1 Identification.type.15NA_2 Identification.type.15NA_3 Identification.type.5Fab2_1 Identification.type.5Fab2_2 Identification.type.5Fab2_3 Identification.type.5HEL_1 Identification.type.5HEL_2 Identification.type.5HEL_3 Identification.type.5NA_1 Identification.type.5NA_2 Identification.type.5NA_3 Identification.type.NB_1 Identification.type.NB_2 Identification.type.NB_3 Sequence.coverage.10Fab2_1…. Sequence.coverage.10Fab2_2…. Sequence.coverage.10Fab2_3…. Sequence.coverage.10HEL_1…. Sequence.coverage.10HEL_2…. Sequence.coverage.10HEL_3…. Sequence.coverage.10NA_1…. Sequence.coverage.10NA_2…. Sequence.coverage.10NA_3…. Sequence.coverage.15Fab2_1…. Sequence.coverage.15Fab2_2…. Sequence.coverage.15Fab2_3…. Sequence.coverage.15HEL_1…. Sequence.coverage.15HEL_2…. Sequence.coverage.15HEL_3…. Sequence.coverage.15NA_1…. Sequence.coverage.15NA_2…. Sequence.coverage.15NA_3…. Sequence.coverage.5Fab2_1…. Sequence.coverage.5Fab2_2…. Sequence.coverage.5Fab2_3…. Sequence.coverage.5HEL_1…. Sequence.coverage.5HEL_2…. Sequence.coverage.5HEL_3…. Sequence.coverage.5NA_1…. Sequence.coverage.5NA_2…. Sequence.coverage.5NA_3…. Sequence.coverage.NB_1…. Sequence.coverage.NB_2…. Sequence.coverage.NB_3…. Intensity Intensity.10Fab2_1 Intensity.10Fab2_2 Intensity.10Fab2_3 Intensity.10HEL_1 Intensity.10HEL_2 Intensity.10HEL_3 Intensity.10NA_1 Intensity.10NA_2 Intensity.10NA_3 Intensity.15Fab2_1 Intensity.15Fab2_2 Intensity.15Fab2_3 Intensity.15HEL_1 Intensity.15HEL_2 Intensity.15HEL_3 Intensity.15NA_1 Intensity.15NA_2 Intensity.15NA_3 Intensity.5Fab2_1 Intensity.5Fab2_2 Intensity.5Fab2_3 Intensity.5HEL_1 Intensity.5HEL_2 Intensity.5HEL_3 Intensity.5NA_1 Intensity.5NA_2 Intensity.5NA_3 Intensity.NB_1 Intensity.NB_2 Intensity.NB_3 LFQ.intensity.10Fab2_1 LFQ.intensity.10Fab2_2 LFQ.intensity.10Fab2_3 LFQ.intensity.10HEL_1 LFQ.intensity.10HEL_2 LFQ.intensity.10HEL_3 LFQ.intensity.10NA_1 LFQ.intensity.10NA_2 LFQ.intensity.10NA_3 LFQ.intensity.15Fab2_1 LFQ.intensity.15Fab2_2 LFQ.intensity.15Fab2_3 LFQ.intensity.15HEL_1 LFQ.intensity.15HEL_2 LFQ.intensity.15HEL_3 LFQ.intensity.15NA_1 LFQ.intensity.15NA_2 LFQ.intensity.15NA_3 LFQ.intensity.5Fab2_1 LFQ.intensity.5Fab2_2 LFQ.intensity.5Fab2_3 LFQ.intensity.5HEL_1 LFQ.intensity.5HEL_2 LFQ.intensity.5HEL_3 LFQ.intensity.5NA_1 LFQ.intensity.5NA_2 LFQ.intensity.5NA_3 LFQ.intensity.NB_1 LFQ.intensity.NB_2 LFQ.intensity.NB_3 MS.MS.count.10Fab2_1 MS.MS.count.10Fab2_2 MS.MS.count.10Fab2_3 MS.MS.count.10HEL_1 MS.MS.count.10HEL_2 MS.MS.count.10HEL_3 MS.MS.count.10NA_1 MS.MS.count.10NA_2 MS.MS.count.10NA_3 MS.MS.count.15Fab2_1 MS.MS.count.15Fab2_2 MS.MS.count.15Fab2_3 MS.MS.count.15HEL_1 MS.MS.count.15HEL_2 MS.MS.count.15HEL_3 MS.MS.count.15NA_1 MS.MS.count.15NA_2 MS.MS.count.15NA_3 MS.MS.count.5Fab2_1 MS.MS.count.5Fab2_2 MS.MS.count.5Fab2_3 MS.MS.count.5HEL_1 MS.MS.count.5HEL_2 MS.MS.count.5HEL_3 MS.MS.count.5NA_1 MS.MS.count.5NA_2 MS.MS.count.5NA_3 MS.MS.count.NB_1 MS.MS.count.NB_2 MS.MS.count.NB_3 MS.MS.count Peptide.sequences Only.identified.by.site Reverse Potential.contaminant id Peptide.IDs Peptide.is.razor Mod..peptide.IDs Evidence.IDs MS.MS.IDs Best.MS.MS Biotinylation.by.BP.site.IDs Oxidation..M..site.IDs Biotinylation.by.BP.site.positions Oxidation..M..site.positions Taxonomy.IDs
A2A432 A2A432 24 24 16 Cullin-4B Cul4b sp|A2A432|CUL4B_MOUSE Cullin-4B OS=Mus musculus OX=10090 GN=Cul4b PE=1 SV=1 1 24 24 16 6 5 8 11 9 10 10 6 8 8 8 7 7 6 9 8 7 10 6 6 6 10 7 10 8 7 10 0 0 0 6 5 8 11 9 10 10 6 8 8 8 7 7 6 9 8 7 10 6 6 6 10 7 10 8 7 10 0 0 0 4 3 5 6 7 6 6 4 4 6 6 4 4 4 5 4 4 5 4 4 4 6 4 6 5 5 7 0 0 0 29 29 19.6 110.7 970 970 51.2 7 21 12 9 22 16 1 7 14 7 1 11 22 4 1 11 20 1 1 17 24 12 19 11 2 13 26 2 9 17 4 1 0 119.34 By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS 7.6 6.4 9.9 13 10.2 11.6 12.3 7.8 9.9 9.6 9.1 8.8 8.1 7.2 10.4 9.2 7.8 12.6 7.9 7.7 7.9 12.5 8.8 11.5 9.6 7.8 12 0 0 0 5250600000 169730000 105660000 301940000 270470000 140050000 176790000 425200000 139640000 253270000 393130000 145790000 190400000 114110000 132250000 175870000 303540000 67076000 221670000 177730000 57706000 71492000 295930000 60818000 211400000 214060000 56391000 378470000 0 0 0 34213000 17305000 57218000 38780000 27667000 38453000 34639000 9452900 36741000 56092000 34132000 34190000 36821000 32517000 43556000 24232000 31234000 41249000 43034000 27963000 26364000 46345000 20189000 34930000 46254000 49181000 32219000 0 0 0 4 0 5 6 2 2 3 1 4 5 1 4 3 2 6 2 1 7 5 1 1 5 1 3 7 0 5 0 0 0 86 AFGSTIVINPEK;AGNKEATDEELEK;AHIISDQK;DKENPNQYNYIA;EATDEELEK;EDSLDSVLFLK;ETVEEQASTTER;EVPEYLHHVNK;GKDIEDGDKFICNDDFK;HATGIEDGELRR;LITYLDQTTQK;LKHECGAAFTSK;LPENYTDETWQK;LYAAEGQK;QLLGEHLTAILQK;QYQIDAAIVR;RPNKPAELIAK;SATDGNTSTTPPTSAK;SIFLFLDR;SILISSVASVHHANGLAK;SSTAVSSFANSKPGSAK;TIDGILLLIER;TLQSLACGK;YNLEELYQAVENLCSHK 0 734;915;993;3585;4649;4791;6196;6334;8365;9402;14173;14233;15003;16199;18730;19317;19602;19849;20507;20532;21558;22609;22945;26942 True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True;True 761;947;1025;3713;4829;4976;6424;6570;8664;9725;14641;14703;15497;16723;19489;20094;20390;20643;21325;21350;22402;23488;23836;27966 18148;22531;22532;22533;22534;22535;22536;22537;22538;22539;22540;24723;24724;24725;24726;24727;24728;24729;24730;24731;24732;24733;24734;24735;24736;24737;24738;24739;24740;24741;24742;24743;24744;24745;24746;24747;24748;24749;83628;83629;106661;110177;110178;141090;141091;141092;141093;141094;141095;141096;141097;141098;141099;141100;141101;141102;141103;141104;141105;141106;141107;141108;141109;141110;141111;141112;141113;141114;141115;141116;141117;141118;141119;141120;141121;141122;141123;141124;141125;141126;141127;141128;141129;141130;141131;141132;141133;144187;144188;144189;144190;144191;144192;144193;144194;144195;144196;144197;144198;144199;144200;144201;144202;144203;144204;144205;144206;144207;196723;223498;223499;223500;223501;223502;223503;223504;223505;223506;223507;223508;223509;223510;223511;223512;223513;348775;348776;350579;350580;350581;368439;368440;368441;368442;368443;368444;368445;395692;395693;395694;395695;395696;395697;395698;395699;395700;395701;395702;395703;395704;395705;395706;395707;395708;395709;395710;395711;395712;395713;395714;395715;395716;395717;395718;395719;395720;395721;395722;395723;395724;395725;395726;395727;395728;395729;395730;458871;458872;458873;473873;473874;473875;473876;473877;473878;473879;473880;480812;480813;480814;480815;480816;480817;480818;480819;480820;480821;480822;480823;480824;480825;480826;480827;480828;480829;480830;480831;480832;480833;480834;480835;485931;503662;503663;503664;503665;503666;503667;503668;503669;503670;503671;503672;503673;503674;503675;503676;503677;503678;503679;503680;503681;503682;503683;503684;503685;503686;503687;503688;503689;503690;503691;503692;503693;504031;530381;530382;530383;530384;530385;530386;530387;530388;530389;530390;530391;530392;530393;530394;530395;530396;530397;530398;530399;530400;530401;530402;530403;530404;530405;530406;530407;530408;530409;530410;530411;530412;530413;530414;530415;530416;530417;530418;530419;530420;530421;530422;530423;530424;530425;530426;530427;530428;530429;530430;530431;530432;530433;530434;556562;565434;565435;565436;565437;565438;565439;565440;565441;565442;565443;565444;565445;565446;565447;664027;664028;664029;664030;664031;664032;664033;664034;664035;664036;664037;664038;664039;664040;664041;664042;664043;664044;664045;664046;664047;664048;664049;664050;664051;664052;664053;664054;664055;664056;664057 11855;14544;14545;14546;15807;15808;15809;15810;15811;15812;15813;55685;55686;70513;73000;92415;92416;92417;92418;92419;92420;92421;92422;92423;92424;92425;92426;92427;92428;92429;92430;92431;92432;94795;94796;94797;137324;156071;156072;156073;156074;244437;244438;245658;245659;257849;257850;257851;257852;275570;275571;275572;275573;275574;275575;275576;275577;275578;275579;275580;322449;332631;332632;332633;332634;336852;336853;336854;336855;336856;336857;336858;336859;336860;339841;353299;353300;353301;353302;353303;353304;353305;353306;353307;353308;353516;374853;374854;374855;374856;374857;374858;374859;374860;374861;374862;374863;374864;374865;374866;374867;374868;374869;374870;374871;374872;393892;399847;399848;399849;399850;399851;399852;399853;399854;466195;466196;466197;466198;466199;466200;466201;466202;466203;466204;466205;466206;466207;466208;466209;466210;466211;466212;466213;466214 11855;14545;15809;55685;70513;73000;92424;94796;137324;156073;244438;245659;257850;275573;322449;332631;336854;339841;353304;353516;374869;393892;399851;466214 10090
A2A5R2 A2A5R2 13 7 7 Brefeldin A-inhibited guanine nucleotide-exchange protein 2 Arfgef2 sp|A2A5R2|BIG2_MOUSE Brefeldin A-inhibited guanine nucleotide-exchange protein 2 OS=Mus musculus OX=10090 GN=Arfgef2 PE=1 SV=1 1 13 7 7 3 3 3 2 1 1 1 2 3 3 1 3 1 2 2 2 1 1 1 1 1 2 2 3 3 1 4 0 0 0 1 1 1 1 0 0 0 0 1 2 0 2 0 0 0 1 0 0 0 0 0 1 1 2 2 0 2 0 0 0 1 1 1 1 0 0 0 0 1 2 0 2 0 0 0 1 0 0 0 0 0 1 1 2 2 0 2 0 0 0 10 6.2 6.2 202.24 1792 1792 47.6 4 3 2 1 4 1 1 3 1 0 13.298 By matching By MS/MS By MS/MS By MS/MS By matching By matching By matching By matching By MS/MS By MS/MS By matching By MS/MS By matching By matching By matching By MS/MS By matching By matching By matching By matching By matching By matching By matching By MS/MS By MS/MS By matching By MS/MS 2.7 2.7 2.7 1.9 1 1 1 1.1 2.5 2.9 1 2.9 1 1.5 1.5 1.6 1 1 1 1 1 1.8 1.8 2.9 3 1 3.6 0 0 0 246760000 8126100 9581900 12528000 7617400 0 0 0 0 3787200 50923000 0 11513000 0 0 0 0 0 0 0 0 0 27476000 16592000 62866000 17064000 0 18683000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 5 ALMEAVSHAK;ANFIEADKYFLPFELACQSK;CIAQMVSSQAANIR;CISQLELAQLIGTGVK;DAYVQALAR;GLECLVSILK;IVETICNCFQGPQTDEGVQLQIIK;LDGNAIVDFVR;QETSSLACCLR;QYLCVALSK;TCYNIYLASK;VGCNPNEDVAIFAVDSLR;WLCAVSMDELASPHHPR 1 1443;1678;2840;2854;3193;8430;11804;13200;18377;19308;22152;24394;26242 True;True;True;True;False;False;True;False;True;False;False;False;True 1499;1752;2954;2968;3311;8729;12215;13649;19121;20085;23010;25349;27245 35395;35396;40472;40473;40474;40475;40476;40477;40478;67206;67207;67208;67209;67345;67346;67347;74776;74777;198311;198312;288174;288175;325329;325330;450634;473689;473690;544535;602576;602577;602578;602579;602580;602581;602582;602583;602584;602585;602586;602587;602588;602589;602590;602591;602592;602593;602594;602595;602596;602597;602598;602599;602600;602601;602602;602603;602604;602605;602606;602607;602608;602609;602610;602611;602612;602613;602614;602615;602616;602617;602618;647797 22844;26009;26010;26011;43747;43831;43832;49158;138405;204124;227606;227607;316461;332533;385254;425321;425322;425323;425324;425325;425326;425327;425328;425329;425330;425331;425332;425333;425334;425335;425336;425337;425338;425339;425340;425341;425342;425343;425344;453825 22844;26011;43747;43832;49158;138405;204124;227606;316461;332533;385254;425329;453825 10090
A2A791 A2A791 2 2 2 Zinc finger MYM-type protein 4 Zmym4 sp|A2A791|ZMYM4_MOUSE Zinc finger MYM-type protein 4 OS=Mus musculus OX=10090 GN=Zmym4 PE=1 SV=1 1 2 2 2 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 0 0 0 1.7 1.7 1.7 172.44 1549 1549 41.4 2 3 2 0.0030448 3.842 By matching By MS/MS By MS/MS By matching By MS/MS 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.7 1 1.7 0 0 0 0 0 0 78354000 5114800 0 9560100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35416000 3815800 24447000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 AVAPQQGLLDR;SNAVQDADSELKPFSK 2 2414;20998 True;True 2508;21833 57286;57287;517358;517359;517360;517361;517362 36804;366193;366194 36804;366193 10090
A2AAE1 A2AAE1 2 2 2 Uncharacterized protein KIAA1109 Kiaa1109 sp|A2AAE1|K1109_MOUSE Transmembrane protein KIAA1109 OS=Mus musculus OX=10090 GN=Kiaa1109 PE=1 SV=4 1 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0.5 0.5 0.5 555.36 5005 5005 47.7 1 2 0.0022727 4.4298 By MS/MS By matching By MS/MS 0 0 0 0 0 0 0 0 0 0 0 0 0.2 0 0.2 0 0 0.3 0 0 0 0 0 0 0 0 0 0 0 0 27456000 0 0 0 0 0 0 0 0 0 0 0 0 5984300 0 3333700 0 0 18138000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 GVVEETANNVDAGK;IGLQPAVLR 3 9274;10678 True;True 9595;11034 220412;256916;256917 154015;178320 154015;178320 10090
A2AAJ9 A2AAJ9 3 3 3 Obscurin Obscn sp|A2AAJ9|OBSCN_MOUSE Obscurin OS=Mus musculus OX=10090 GN=Obscn PE=1 SV=3 1 3 3 3 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 2.1 2.1 2.1 966.58 8886 8886 49.4 1 1 1 1 1 1 1 1 1 1 0.0014138 4.9187 By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS 0 0 0 0.1 0 0.1 0 0.1 0 0.1 0 0 0.1 0.1 0 0 0 0 0 0 0 0 0 0 0 0.1 1.9 0 0.1 0 974810000 0 0 0 399150000 0 48428000 0 0 0 224130000 0 0 41789000 175830000 0 0 0 0 0 0 0 0 0 0 0 19243000 28757000 0 37486000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 8 ADAGEYSCEAGGQK;ITPSEPQYSK;KEEATEGTMVTLR 4 381;11734;12094 True;True;True 393;12143;12511 8197;286124;286125;286126;286127;286128;286129;296569;296570;296571 4946;202873;202874;202875;202876;209575;209576;209577 4946;202876;209577 10090
A2AB59 A2AB59 3 3 3 Rho GTPase-activating protein 27 Arhgap27 sp|A2AB59|RHG27_MOUSE Rho GTPase-activating protein 27 OS=Mus musculus OX=10090 GN=Arhgap27 PE=1 SV=1 1 3 3 3 0 0 0 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 0 5.6 5.6 5.6 97.047 869 869 57.6 1 3 1 2 1 2 3 1 1 1 2 0 8.4069 By matching By MS/MS By MS/MS By MS/MS By matching By matching By matching By MS/MS By MS/MS By MS/MS By MS/MS By MS/MS 0 0 0 1.2 1.2 0 0.9 0 0 3.6 3.6 3.6 0.9 0.9 0 0.9 3.6 3.6 0 0 0 0 1.2 0 0 0 0 0 0 0 164660000 0 0 0 19601000 11259000 0 23271000 0 0 18397000 8924200 5405100 24449000 14005000 0 23050000 2859200 5382300 0 0 0 0 8055700 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 5 AIAEGISELSADLLQGEEGEPSSADFGSSER;ISGNLATIQK;TSAAGGLR 5 1032;11565;23303 True;True;True 1070;11970;24211 25669;25670;25671;25672;25673;25674;25675;281589;281590;281591;574296;574297;574298;574299;574300;574301;574302;574303 16284;16285;16286;199738;199739;405697;405698;405699;405700 16285;199739;405700 10090

Filter out Potential.contaminant, Reverse and Only.identified.by.site. Also select important features.

# Filter out contaminants hits
df = raw %>%
  filter(Potential.contaminant != "+") %>%
  filter(Reverse != "+") %>%
  filter(Only.identified.by.site != "+")


#select intensity columns
df <- dplyr::select(df,Majority.protein.IDs,Gene.names, Protein.names,Unique.peptides,
                   "Unique.peptides.NB_1", "Unique.peptides.NB_2", "Unique.peptides.NB_3",
                   "Unique.peptides.5NA_1", "Unique.peptides.5NA_2", "Unique.peptides.5NA_3",
                   "Unique.peptides.10NA_1", "Unique.peptides.10NA_2", "Unique.peptides.10NA_3",
                   "Unique.peptides.15NA_1", "Unique.peptides.15NA_2", "Unique.peptides.15NA_3",
                   "Unique.peptides.5HEL_1", "Unique.peptides.5HEL_2", "Unique.peptides.5HEL_3",
                   "Unique.peptides.10HEL_1", "Unique.peptides.10HEL_2", "Unique.peptides.10HEL_3",
                   "Unique.peptides.15HEL_1", "Unique.peptides.15HEL_2", "Unique.peptides.15HEL_3",
                   "Unique.peptides.5Fab2_1", "Unique.peptides.5Fab2_2", "Unique.peptides.5Fab2_3",
                   "Unique.peptides.10Fab2_1", "Unique.peptides.10Fab2_2", "Unique.peptides.10Fab2_3", 
                   "Unique.peptides.15Fab2_1", "Unique.peptides.15Fab2_2", "Unique.peptides.15Fab2_3",
                   "Intensity.NB_1", "Intensity.NB_2", "Intensity.NB_3",
                   "Intensity.5NA_1", "Intensity.5NA_2", "Intensity.5NA_3",
                   "Intensity.10NA_1", "Intensity.10NA_2", "Intensity.10NA_3",
                   "Intensity.15NA_1", "Intensity.15NA_2", "Intensity.15NA_3",
                   "Intensity.5HEL_1", "Intensity.5HEL_2", "Intensity.5HEL_3",
                   "Intensity.10HEL_1", "Intensity.10HEL_2", "Intensity.10HEL_3",
                   "Intensity.15HEL_1", "Intensity.15HEL_2", "Intensity.15HEL_3",
                   "Intensity.5Fab2_1", "Intensity.5Fab2_2", "Intensity.5Fab2_3",
                   "Intensity.10Fab2_1", "Intensity.10Fab2_2", "Intensity.10Fab2_3",
                   "Intensity.15Fab2_1", "Intensity.15Fab2_2", "Intensity.15Fab2_3",
                   Unique.peptides, Reverse, Potential.contaminant, Mol..weight..kDa.
)

Only proteins that are identified with unique peptides > 2 are considered.

#filter out  <2 unique peptide
df <- filter(df, Unique.peptides > 1) %>%
  arrange(Gene.names)

After MaxQuant run, 2636 proteins were identified.

print(nrow(raw) )
## [1] 2636

This was trimmed down to 2243 proteins after removing potential contaminants, proteins only identified by site, by reverse and proteins only identified with < 2 unique peptides.

#converts some columns to numeric
df[,4:67] <- sapply(df[,4:67], as.numeric)

#log transform intensity columns
df[,35:64] <- log2(df[,35:64])

#convert infinite to NA
df[mapply(is.infinite, df)] <- NA

df <- dplyr::select(df, Majority.protein.IDs, Gene.names, Protein.names, Unique.peptides,
                 "Intensity.NB_1", "Intensity.NB_2", "Intensity.NB_3",
                 "Intensity.5NA_1", "Intensity.5NA_2", "Intensity.5NA_3",
                 "Intensity.10NA_1", "Intensity.10NA_2", "Intensity.10NA_3",
                 "Intensity.15NA_1", "Intensity.15NA_2", "Intensity.15NA_3",
                 "Intensity.5HEL_1", "Intensity.5HEL_2", "Intensity.5HEL_3",
                 "Intensity.10HEL_1", "Intensity.10HEL_2", "Intensity.10HEL_3",
                 "Intensity.15HEL_1", "Intensity.15HEL_2", "Intensity.15HEL_3",
                 "Intensity.5Fab2_1", "Intensity.5Fab2_2", "Intensity.5Fab2_3",
                 "Intensity.10Fab2_1", "Intensity.10Fab2_2", "Intensity.10Fab2_3",
                 "Intensity.15Fab2_1", "Intensity.15Fab2_2", "Intensity.15Fab2_3",
                 Unique.peptides, Reverse, Potential.contaminant, Mol..weight..kDa.
)

In “Majority.protein.IDs” column, some of the protein IDs for a specific protein are more than one. These multiple IDs are separated by semicolon. I need to chose only the first ID and discard the ones after semicolon.

#chhose first ID before semicolon
df$Majority.protein.IDs <- as.character(df$Majority.protein.IDs)
df$Majority.protein.IDs <- sub(';.*',"", df$Majority.protein.IDs)

2.1.1 Intersections

Upset plot was then used to display intersection among different conditions of the identified proteins.

Non_Activated <- rep(1, length(df$Majority.protein.IDs))
Non_Activated[rowSums(is.na(df[8:16])) > 8] = 0

HEL_Act_5min <- rep(1, length(df$Majority.protein.IDs))
HEL_Act_5min[rowSums(is.na(df[17:19])) > 2] = 0

HEL_Act_10min <- rep(1, length(df$Majority.protein.IDs))
HEL_Act_10min[rowSums(is.na(df[20:22])) > 2] = 0

HEL_Act_15min <- rep(1, length(df$Majority.protein.IDs))
HEL_Act_15min[rowSums(is.na(df[23:25])) > 2] = 0

fab_Act_5min <- rep(1, length(df$Majority.protein.IDs))
fab_Act_5min[rowSums(is.na(df[26:28])) > 2] = 0

fab_Act_10min <- rep(1, length(df$Majority.protein.IDs))
fab_Act_10min[rowSums(is.na(df[29:31])) > 2] = 0

fab_Act_15min <- rep(1, length(df$Majority.protein.IDs))
fab_Act_15min[rowSums(is.na(df[32:34])) > 2] = 0

data_upset <- data.frame(df, Non_Activated, HEL_Act_5min, HEL_Act_10min, HEL_Act_15min,
                      fab_Act_5min, fab_Act_10min, fab_Act_15min) %>%
  dplyr::select(Gene.names, Majority.protein.IDs, Non_Activated, HEL_Act_5min,
                HEL_Act_10min, HEL_Act_15min, fab_Act_5min, fab_Act_10min, fab_Act_15min) %>%
  rename("Non-Activated"="Non_Activated", "HEL-5min"="HEL_Act_5min", "HEL-10min"="HEL_Act_10min",
         "HEL-15min"="HEL_Act_15min", "Fab2-5min"="fab_Act_5min", "Fab2-10min"="fab_Act_10min",
         "Fab2-15min"="fab_Act_15min")
upSet <- upset(data_upset, sets = c("Non-Activated", "Fab2-5min", "Fab2-10min", "Fab2-15min",
                              "HEL-5min", "HEL-10min", "HEL-15min"),
             keep.order= TRUE ,number.angles = 30, point.size = 2,
             line.size = 0.5,  mb.ratio = c(0.60, 0.40),order.by = "freq",
             mainbar.y.label = "Protein hits intersection", sets.x.label = "Protein hits per conditions",
             text.scale = c(1.3, 1.3, 1, 1, 2, 0.9))

upSet

Ensure that a protein has one intensity value per condition. Then ensure that the amount of NA per row is not greater that 14.

conf_df=subset(df,rowSums(is.na(df[8:16])) < 8 | rowSums(is.na(df[17:19])) < 2 |
               rowSums(is.na(df[20:22])) < 2 | rowSums(is.na(df[23:25])) < 2 |
               rowSums(is.na(df[26:28])) < 2 | rowSums(is.na(df[29:31])) < 2 |
               rowSums(is.na(df[32:34])) < 2 )#at least 2 in one condition



conf_df=subset(conf_df, rowSums(is.na(conf_df[8:34])) < 14)#1 out of 3

row.names(conf_df) = conf_df$Majority.protein.IDs
gene_name <- conf_df$Gene.names

#remove some columns
conf_df <- conf_df %>% dplyr::select(-c("Majority.protein.IDs", "Gene.names", "Protein.names", "Unique.peptides", 
                             "Intensity.NB_1", "Intensity.NB_2", "Intensity.NB_3", 
                             "Reverse", "Potential.contaminant", "Mol..weight..kDa."))
#rename columns
colnames(conf_df) <- c("5NA_1", "5NA_2", "5NA_3", "10NA_1", 
                    "10NA_2", "10NA_3", "15NA_1", "15NA_2", 
                    "15NA_3", "5HEL_1", "5HEL_2", "5HEL_3", 
                    "10HEL_1", "10HEL_2", "10HEL_3", 
                    "15HEL_1", "15HEL_2", "15HEL_3", 
                    "5Fab2_1", "5Fab2_2", "5Fab2_3", 
                    "10Fab2_1", "10Fab2_2", "10Fab2_3", 
                    "15Fab2_1", "15Fab2_2", "15Fab2_3")

my_label <- c("NA_", "NA_", "NA_", "NA_",
              "NA_", "NA_", "NA_", "NA_",
              "NA_", "5HEL", "5HEL", "5HEL",
              "10HEL", "10HEL", "10HEL",
              "15HEL", "15HEL", "15HEL",
              "5Fab2", "5Fab2", "5Fab2",
              "10Fab2", "10Fab2", "10Fab2",
              "15Fab2", "15Fab2", "15Fab2")

my_label2 <- c("5NA", "5NA", "5NA", "10NA",
              "10NA", "10NA", "15NA", "15NA",
              "15NA", "5HEL", "5HEL", "5HEL",
              "10HEL", "10HEL", "10HEL",
              "15HEL", "15HEL", "15HEL",
              "5Fab2", "5Fab2", "5Fab2",
              "10Fab2", "10Fab2", "10Fab2",
              "15Fab2", "15Fab2", "15Fab2")

# my_label2 <- c("5NA_1", "5NA_2", "5NA_3", "10NA_1", 
#                     "10NA_2", "10NA_3", "15NA_1", "15NA_2", 
#                     "15NA_3", "5HEL_1", "5HEL_2", "5HEL_3", 
#                     "10HEL_1", "10HEL_2", "10HEL_3", 
#                     "15HEL_1", "15HEL_2", "15HEL_3", 
#                     "5Fab2_1", "5Fab2_2", "5Fab2_3", 
#                     "10Fab2_1", "10Fab2_2", "10Fab2_3", 
#                     "15Fab2_1", "15Fab2_2", "15Fab2_3")

2.1.2 Check missing values

In total there are 27 conditions. To be less stringent, I had allowed max. 13 missing values per row. The plots below were used to evaluate the amount of missing values present in this data.


 #From VIM package aggr function helps rank the variables in decreasing order of missing values. 
# e.g variable ca has the highest amount of missing values.
mice_plot <- aggr(conf_df, col = c("green","blue"),
                    numbers=TRUE, sortVars = TRUE,
                    labels = names(conf_df), cex.axis = .7,
                    gap = 3, ylab = c("Missing data","Pattern")) # (VIM) display graphically missing values

## 
##  Variables sorted by number of missings: 
##  Variable      Count
##  10Fab2_2 0.17955615
##     5NA_2 0.17148621
##  10Fab2_1 0.15063887
##    15NA_2 0.14794889
##    10NA_2 0.14660390
##   5Fab2_2 0.13449899
##   10HEL_2 0.13382650
##  10Fab2_3 0.13248151
##   15HEL_1 0.12642905
##   5Fab2_1 0.12508406
##   5Fab2_3 0.12172159
##    10NA_1 0.11970410
##    5HEL_2 0.11634163
##    10NA_3 0.11096167
##     5NA_1 0.10692670
##   10HEL_1 0.10558171
##   10HEL_3 0.10356422
##    15NA_1 0.09885676
##    5HEL_1 0.09347680
##    15NA_3 0.09213181
##    5HEL_3 0.08944183
##     5NA_3 0.07868191
##   15HEL_2 0.07464694
##   15HEL_3 0.06792199
##  15Fab2_2 0.06523201
##  15Fab2_1 0.05716207
##  15Fab2_3 0.04841964

In the heat map on right side of the above plot, blue represents missing value while green represents non-missing value.

Note: I did not notice any striking pattern in the above evaluation of missing values across conditions. As a result i did not take any action regarding this.

2.2 Missing imputation and Normalization

Data was normalized using CycLoess (from NormalyzerDE) and missing values were imputed using kNN (nearest neighbor = 3). Both MDS plot and Hierarchical cluster plot were used to evaluate similarities among samples before and after importation.

2.2.1 MDS and Hierarchical cluster before normalization and imputation

myplclust(hclust(dist(t(conf_df))), labels = my_label2, 
          lab.col = as.fumeric(as.character(my_label2)))

MSD plot

group <- factor(as.character(my_label2))
d <- dist(t(conf_df))
mds <- cmdscale(d)
mypar()
plot(mds[,1], mds[,2], bg = as.numeric(group),pch = 21,
     xlab="First dimension",ylab="Second dimension")
#legend("bottomleft",levels(group), col=seq(along=levels(group)),pch=15, text.width = 9, ncol = 3)
text(mds[,1], mds[,2], labels = group, cex= 0.6, pos=2)

group <- factor(as.character(my_label2))
d <- dist(t(conf_df))
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,3], mds[,4],bg = as.numeric(group),pch=21,
     xlab="Third dimension",ylab="Forth dimension")
#legend("bottomleft",levels(group), col=seq(along=levels(group)),pch=15, text.width = 9, ncol = 3)
text(mds[,3], mds[,4], labels = group, cex= 0.6, pos=2)

From the above MSD plot, it seems the data clustered mostly based on days of experiments.

Note K was chosen using Elbow method.

#####Imputation kNN
conf_df$Uniprot = row.names(conf_df)
dat_impt=readMSnSet2(conf_df, ecol= 1:27, fnames = "Uniprot")#msnset mode

knn_imputation <- impute(dat_impt, "knn")#imputation
knn_imputation <- data.frame(knn_imputation)%>%t()%>%data.frame()#transpose
knn_imputation$Uniprot <- rownames(knn_imputation)#make rowname columns
#rio::export(knn_imputation, "data_imp.txt")

2.2.2 Hierarchical cluster after imputation

group <- factor(as.character(my_label2))
d <- dist(t(data_impt[, 1:27]))
mds <- cmdscale(d)
mypar()
plot(mds[,1],mds[,2],bg=as.numeric(group),pch=21,
     xlab="First dimension",ylab="Second dimension")
#legend("bottomleft",levels(group), col=seq(along=levels(group)),pch=15, text.width = 9, ncol = 3)
text(mds[,1],mds[,2], labels=group, cex= 0.6, pos=2)

After imputation, samples still seems to cluster based on days of experiments.

NormalyzerDE R package was used to select the best normalization method for this data. The best normalization method is CycLoess

#Normalization
dataFp <- "~/Desktop/APEX2_project2/data_imp.txt"
designFp <- "~/Desktop/APEX2_project2/design.txt"
outDir <- "~/Desktop/APEX2_project2"
normalyzer(jobName="quick_analysis", noLogTransform= TRUE, designPath=designFp, 
           dataPath=dataFp, outputDir=outDir)
## You are running version 1.10.0 of NormalyzerDE
## [Step 1/5] Load data and verify input
## Input data checked. All fields are valid.
## Sample check: More than one sample group found
## Sample replication check: All samples have replicates
## No RT column found, skipping RT processing
## [Step 1/5] Input verified, job directory prepared at:~/Desktop/APEX2_project2/quick_analysis
## [Step 2/5] Performing normalizations
## VSN normalization assumes non log-transformed data, as the option noLogTransform is specified it is assumed to not need log2-transformation and thus the VSN normalization is skipped
## No RT column specified (column named 'RT') or option not specified Skipping RT normalization.
## [Step 2/5] Done!
## [Step 3/5] Generating evaluation measures...
## [Step 3/5] Done!
## [Step 4/5] Writing matrices to file
## [Step 4/5] Matrices successfully written
## [Step 5/5] Generating plots...
## [Step 5/5] Plots successfully generated
## All done! Results are stored in: ~/Desktop/APEX2_project2/quick_analysis, processing time was 0.9 minutes

2.2.3 MDS and Hierarchical cluster after imputation and Normalization

norm_df <- import("~/Desktop/APEX2_project2/quick_analysis/CycLoess-normalized.txt")
myplclust(hclust(dist(t(norm_df[, 2:28]))), labels = my_label2, 
          lab.col = as.fumeric(as.character(my_label2)))

MDS plot

d <- dist(t(norm_df[, 2:28]))
mds <- cmdscale(d)
mypar()
plot(mds[,1],mds[,2],bg=as.numeric(group), pch=21,
     xlab="First dimension",ylab="Second dimension")
#legend("topleft",levels(group),col=seq(along=levels(group)),pch=15, text.width = 4, ncol = 3)
text(mds[,1],mds[,2], labels=group, cex= 0.6, pos=2)

d <- dist(t(norm_df[, 2:28]))
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,3],mds[,4],bg=as.numeric(group), pch=21,
     xlab="Third dimension",ylab="Forth dimension")
#legend("topleft",levels(group),col=seq(along=levels(group)),pch=15, text.width = 4, ncol = 3)
text(mds[,3],mds[,4], labels=group, cex= 0.6, pos=2)

After normalization, samples start to cluster based on replicates

2.3 Batch correction

To remove batch effect I tried 3 different methods, Linear regression and SVA.

2.3.1 Linear regression

design = read.delim("~/Desktop/APEX2_project2/design_batch.txt")
row.names(design) = design$sample

mod = model.matrix(~as.factor(group)+ as.factor(Batch),data=design)
fit = lm.fit(mod,t(norm_df[2:28]))


fitted_values <- fit[["fitted.values"]]
data_batchCor_linear <- t(fitted_values)
data_batchCor_linear <- data.frame(data_batchCor_linear)
data_batchCor_linear$Uniprot <- norm_df$Uniprot
myplclust(hclust(dist(t(data_batchCor_linear[,1:27]))), labels = my_label2, 
          lab.col = as.fumeric(as.character(my_label2)))

MDS plot

group <- factor(as.character(my_label))
d <- dist(t(data_batchCor_linear[,1:27]))       
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,1],mds[,2],bg=as.numeric(group),pch=21,
     xlab="First dimension",ylab="Second dimension")
text(mds[,1],mds[,2], labels=group, cex= 0.6, pos=2)

#legend("topright",levels(group),col=seq(along=levels(group)), pch=15, text.width = 4, ncol = 3)
#legend("topleft",levels(group),col=seq(along=levels(group)),pch=15, text.width = 4, ncol = 3)
group <- factor(as.character(my_label))
d <- dist(t(data_batchCor_linear[,1:27]))
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,3],mds[,4],bg= as.numeric(group),pch=21,
     xlab="Third dimension",ylab="forth dimension")
text(mds[,3],mds[,4], labels=group, cex= 0.6, pos = 2)

#legend("bottomright",levels(group),col=seq(along=levels(group)), pch=15, text.width = 4, ncol = 3)
#legend("topleft",levels(group),col=seq(along=levels(group)),pch=15, text.width = 4, ncol = 3)

2.3.2 SVA

row.names(norm_df) = norm_df$Uniprot
norm_df2 <- norm_df[, 2:28]
mod = model.matrix(~group,data = design)
mod0 = model.matrix(~1, data = design)
sva1 = sva(data.matrix(norm_df2), mod, mod0, n.sv = 2)
## Number of significant surrogate variables is:  2 
## Iteration (out of 5 ):1  2  3  4  5
summary(lm(sva1$sv ~ design$Batch))
## Response Y1 :
## 
## Call:
## lm(formula = Y1 ~ design$Batch)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10635 -0.06122 -0.02987  0.02314  0.34310 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.40359    0.05262   7.670 5.02e-08 ***
## design$Batch -0.20180    0.02436  -8.284 1.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1033 on 25 degrees of freedom
## Multiple R-squared:  0.733,  Adjusted R-squared:  0.7223 
## F-statistic: 68.63 on 1 and 25 DF,  p-value: 1.236e-08
## 
## 
## Response Y2 :
## 
## Call:
## lm(formula = Y2 ~ design$Batch)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22908 -0.14193 -0.06845  0.15396  0.48328 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -0.18377    0.09378  -1.960   0.0613 .
## design$Batch  0.09189    0.04341   2.117   0.0444 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1842 on 25 degrees of freedom
## Multiple R-squared:  0.152,  Adjusted R-squared:  0.1181 
## F-statistic:  4.48 on 1 and 25 DF,  p-value: 0.04442
modsv = cbind(mod, sva1$sv)
fitsv = lm.fit(modsv, t(data.matrix(norm_df2)))

fitted_values <- fitsv[["fitted.values"]]
data_batchCor_sva <- t(fitted_values)
myplclust(hclust(dist(t(data_batchCor_sva))), labels = my_label2, lab.col = as.fumeric(as.character(my_label)))

MDS plot

group <- factor(as.character(my_label2))
d <- dist(t(data_batchCor_sva))
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,1],mds[,2],bg = as.numeric(group), pch = 21,
     xlab="First dimension",ylab="Second dimension")
text(mds[,1],mds[,2], labels=group, cex= 0.6, pos = 2)

#legend("topright",levels(group),col=seq(along=levels(group)), pch=15, text.width = 4, ncol = 3)
group <- factor(as.character(my_label2))
d <- dist(t(data_batchCor_sva))
mds <- cmdscale(d, k = 4)
mypar()
plot(mds[,3],mds[,4],bg=as.numeric(group),pch=21,
     xlab="Third dimension",ylab="Forth dimension")
text(mds[,3],mds[,4], labels=group, cex= 0.6, pos=2)

#legend("topleft",levels(group),col=seq(along=levels(group)), pch=15, text.width = 4, ncol = 3)
data_batchCor_sva <- data.frame(data_batchCor_sva)

data_batchCor_sva$Uniprot = row.names(data_batchCor_sva)

#rio::export(data_batchCor_sva, "data_batchCor_sva.txt")

I then performed differential expression analysis using Limma after batch correct.

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] UpSetR_1.4.0           VIM_6.1.1              colorspace_2.0-3      
##  [4] mice_3.14.0            gplots_3.1.1           purrr_0.3.4           
##  [7] ggpubr_0.4.0           tidyr_1.2.0            plotly_4.10.0         
## [10] EnhancedVolcano_1.10.0 ggrepel_0.9.1          ggplot2_3.3.6         
## [13] ReactomeGSA_1.6.1      sva_3.40.0             BiocParallel_1.26.2   
## [16] genefilter_1.74.1      mgcv_1.8-38            nlme_3.1-153          
## [19] data.table_1.14.2      MSnbase_2.18.0         ProtGenerics_1.24.0   
## [22] S4Vectors_0.30.2       mzR_2.26.1             Rcpp_1.0.7            
## [25] Biobase_2.52.0         BiocGenerics_0.38.0    NormalyzerDE_1.10.0   
## [28] ETLUtils_1.5           ff_4.0.5               bit_4.0.4             
## [31] RColorBrewer_1.1-3     knitr_1.36             kableExtra_1.3.4      
## [34] dplyr_1.0.9            pheatmap_1.0.12        rafalib_1.0.0         
## [37] rio_0.5.29            
## 
## loaded via a namespace (and not attached):
##   [1] pacman_0.5.1                utf8_1.2.2                 
##   [3] tidyselect_1.1.2            RSQLite_2.2.9              
##   [5] AnnotationDbi_1.54.1        htmlwidgets_1.5.4          
##   [7] ranger_0.13.1               munsell_0.5.0              
##   [9] codetools_0.2-18            preprocessCore_1.54.0      
##  [11] withr_2.5.0                 highr_0.9                  
##  [13] ggalt_0.4.0                 rstudioapi_0.13            
##  [15] robustbase_0.93-9           vcd_1.4-9                  
##  [17] ggsignif_0.6.3              Rttf2pt1_1.3.9             
##  [19] mzID_1.30.0                 labeling_0.4.2             
##  [21] MatrixGenerics_1.4.3        GenomeInfoDbData_1.2.6     
##  [23] farver_2.1.1                bit64_4.0.5                
##  [25] vctrs_0.4.1                 generics_0.1.3             
##  [27] xfun_0.28                   R6_2.5.1                   
##  [29] doParallel_1.0.16           GenomeInfoDb_1.28.4        
##  [31] ggbeeswarm_0.6.0            clue_0.3-60                
##  [33] locfit_1.5-9.4              MsCoreUtils_1.4.0          
##  [35] bitops_1.0-7                cachem_1.0.6               
##  [37] DelayedArray_0.18.0         assertthat_0.2.1           
##  [39] scales_1.2.0                nnet_7.3-16                
##  [41] beeswarm_0.4.0              gtable_0.3.0               
##  [43] ash_1.0-15                  affy_1.70.0                
##  [45] sandwich_3.0-1              rlang_1.0.3                
##  [47] systemfonts_1.0.2           splines_4.1.0              
##  [49] rstatix_0.7.0               lazyeval_0.2.2             
##  [51] extrafontdb_1.0             impute_1.66.0              
##  [53] hexbin_1.28.2               broom_1.0.0                
##  [55] checkmate_2.0.0             BiocManager_1.30.18        
##  [57] yaml_2.2.1                  abind_1.4-5                
##  [59] backports_1.4.1             Hmisc_4.6-0                
##  [61] extrafont_0.17              tools_4.1.0                
##  [63] affyio_1.62.0               ellipsis_0.3.2             
##  [65] raster_3.5-2                jquerylib_0.1.4            
##  [67] proxy_0.4-26                plyr_1.8.6                 
##  [69] base64enc_0.1-3             zlibbioc_1.38.0            
##  [71] RCurl_1.98-1.5              rpart_4.1-15               
##  [73] zoo_1.8-9                   SummarizedExperiment_1.22.0
##  [75] haven_2.4.3                 cluster_2.1.2              
##  [77] magrittr_2.0.3              openxlsx_4.2.5             
##  [79] lmtest_0.9-39               pcaMethods_1.84.0          
##  [81] matrixStats_0.61.0          hms_1.1.1                  
##  [83] RcmdrMisc_2.7-1             evaluate_0.14              
##  [85] xtable_1.8-4                XML_3.99-0.10              
##  [87] jpeg_0.1-9                  readxl_1.3.1               
##  [89] IRanges_2.26.0              gridExtra_2.3              
##  [91] compiler_4.1.0              tibble_3.1.7               
##  [93] maps_3.4.0                  KernSmooth_2.23-20         
##  [95] ncdf4_1.19                  crayon_1.5.1               
##  [97] htmltools_0.5.2             Formula_1.2-4              
##  [99] DBI_1.1.1                   proj4_1.0-10.1             
## [101] MASS_7.3-54                 boot_1.3-28                
## [103] Matrix_1.3-4                car_3.0-12                 
## [105] cli_3.3.0                   vsn_3.60.0                 
## [107] GenomicRanges_1.44.0        forcats_0.5.1              
## [109] pkgconfig_2.0.3             laeken_0.5.2               
## [111] foreign_0.8-81              sp_1.4-6                   
## [113] terra_1.4-22                MALDIquant_1.21            
## [115] xml2_1.3.3                  foreach_1.5.1              
## [117] svglite_2.0.0               annotate_1.70.0            
## [119] vipor_0.4.5                 bslib_0.3.1                
## [121] webshot_0.5.2               XVector_0.32.0             
## [123] rvest_1.0.2                 stringr_1.4.0              
## [125] digest_0.6.29               Biostrings_2.60.2          
## [127] rmarkdown_2.11              cellranger_1.1.0           
## [129] htmlTable_2.3.0             nortest_1.0-4              
## [131] edgeR_3.34.1                curl_4.3.2                 
## [133] gtools_3.9.2                lifecycle_1.0.1            
## [135] jsonlite_1.7.2              carData_3.0-4              
## [137] viridisLite_0.4.0           limma_3.48.3               
## [139] fansi_1.0.3                 pillar_1.7.0               
## [141] lattice_0.20-45             DEoptimR_1.0-9             
## [143] ggrastr_1.0.1               KEGGREST_1.32.0            
## [145] fastmap_1.1.0               httr_1.4.2                 
## [147] survival_3.2-13             glue_1.6.2                 
## [149] zip_2.2.0                   png_0.1-7                  
## [151] iterators_1.0.13            class_7.3-19               
## [153] stringi_1.7.8               sass_0.4.0                 
## [155] blob_1.2.2                  latticeExtra_0.6-29        
## [157] caTools_1.18.2              memoise_2.0.1              
## [159] ape_5.5                     e1071_1.7-9